package cn.inspur.spark

import org.apache.spark.{SparkConf, SparkContext}

object RDD_clear_jd_comment {
  def main(args: Array[String]): Unit = {
    list()
    detail()
    comment()
  }

  def comment(): Unit = {
    val sc = new SparkContext(new SparkConf().setAppName("comment").setMaster("local"))
    val commentRDD = sc.textFile("hdfs://nd11:9000/e-commerce/jd_comment")
    commentRDD
      .map(_.split(","))
      .filter(arr => {
        var bl = true
        //判断长度
        if (arr.length != 12)
          bl = false
        //去掉标题行
        else if (arr(0) == "序号")
          bl = false
        // 检查每条评论的用户等级
        else if (arr(9).toInt == 0)
          bl = false
        bl
      })
      .map(arr => {
        arr.mkString("\001")
      })
      .saveAsTextFile("hdfs://nd11:9000/e-commerce/cleared/comment")
    sc.stop()
  }

  def detail(): Unit = {
    val sc = new SparkContext(new SparkConf().setAppName("detail").setMaster("local"))
    val detailRDD = sc.textFile("hdfs://nd11:9000/e-commerce/jd_phone_detail")
    detailRDD
      .map(_.split(","))
      .filter(arr => {
        var bl = true
        if (arr.length != 19)
          bl = false
        else if (arr(0) == "商品名称")
          bl = false
        bl
      })
      .map(arr => arr.mkString("\001"))
      //.saveAsTextFile("D:/file/detail")
      .saveAsTextFile("hdfs://nd11:9000/e-commerce/cleared/detail")
    sc.stop()
  }


  def list(): Unit = {
    val sc = new SparkContext(new SparkConf().setAppName("list").setMaster("local"))
    val listRDD = sc.textFile("hdfs://nd11:9000/e-commerce/jd_phone_list")
    listRDD
      .map(_.split(","))
      .filter(arr => {
        var bl = true
        if (arr.length != 7)
          bl = false
        else if (arr(0) == "商品id")
          bl = false
        bl
      })
      .map(arr => arr.mkString("\001"))
      //.saveAsTextFile("D:\\file\\list")
      .saveAsTextFile("hdfs://nd11:9000/e-commerce/cleared/list")
    sc.stop()
  }

}
